import requests
from jsonpath import jsonpath
from openpyxl import workbook
url='https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list'


#wb.save('new_tencent.xlsx') 保存表格


def get_data(arguments):
    # 参数
    argument = arguments
    # 伪装爬虫
    header = {
        'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0'
    }
    result = requests.get(url, headers=header, params=argument)
    return result.json()
def parase_data(data):
    new_titles=jsonpath(data,'$..title')
    new_names= jsonpath(data, '$..media_name')
    new_links = jsonpath(data, '$..url')
    # for new_title,new_name,new_link in zip(new_titles,new_names,new_links):
    #     print(new_title)
    #     print(new_name)
    #     print(new_link)
    #     print('----------------------')
    return new_titles,new_names,new_links

def save_data(new_titles,new_names,new_links):
    for new_title,new_name,new_link in zip(new_titles,new_names,new_links):
        ws.append([new_title,new_name,new_link])
    wb.save('new_tencent.xlsx')

if __name__=='__main__':
    # 创建excel表格对象
    wb = workbook.Workbook()
    ws = wb.active  # 激活Excel表格
    ws.append(['标题', '媒体', '链接'])

    #假设储存前10页的信息
    for i in range(0,10):
        arguments={
            'sub_srv_id': "24hours",
            'srv_id': 'pc',
            'offset': str(i),#翻页参数
            'limit': '20',
            'strategy': '1',
            'ext': '{"pool":["top","hot"],"is_filter":7,"check_type":true}'
        }
        data = get_data(arguments)
        new_titles, new_names, new_links = parase_data(data)
        save_data(new_titles, new_names, new_links)